{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from CB_IPO import scrape"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initializing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"scraper = scrape()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Scraping for recent IPO filings (S-1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Intuitive Machines, Inc. (LUNR, LUNRW) ', 'Nuburu, Inc. (BURU, BURU-WT) ', 'SOLIGENIX, INC. (SNGX) ', 'NovaBay Pharmaceuticals, Inc. (NBY) ', 'Kodiak Gas Services, Inc. ']\n",
"['2023-03-31', '2023-03-31', '2023-03-31', '2023-03-31']\n",
"{'S-1', 'S-1/A'}\n"
]
}
],
"source": [
"company_names, file_dates, form_types = scraper.edgar_scrape(5)\n",
"print(company_names)\n",
"print(file_dates[1:])\n",
"print(form_types)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting into Dataframe"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" names | \n",
" filing date | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Nuburu, Inc. (BURU, BURU-WT) | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 2 | \n",
" SOLIGENIX, INC. (SNGX) | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 3 | \n",
" NovaBay Pharmaceuticals, Inc. (NBY) | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 4 | \n",
" Kodiak Gas Services, Inc. | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 5 | \n",
" Artificial Intelligence Technology Solutions I... | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" names filing date\n",
"1 Nuburu, Inc. (BURU, BURU-WT) 2023-03-31\n",
"2 SOLIGENIX, INC. (SNGX) 2023-03-31\n",
"3 NovaBay Pharmaceuticals, Inc. (NBY) 2023-03-31\n",
"4 Kodiak Gas Services, Inc. 2023-03-31\n",
"5 Artificial Intelligence Technology Solutions I... 2023-03-31"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = scraper.generate_df(6,1)\n",
"df.iloc[1:]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Modifying Search Dates"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" names | \n",
" filing date | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Nuburu, Inc. (BURU, BURU-WT) | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 2 | \n",
" SOLIGENIX, INC. (SNGX) | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 3 | \n",
" NovaBay Pharmaceuticals, Inc. (NBY) | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 4 | \n",
" Kodiak Gas Services, Inc. | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
" 5 | \n",
" Artificial Intelligence Technology Solutions I... | \n",
" 2023-03-31 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" names filing date\n",
"1 Nuburu, Inc. (BURU, BURU-WT) 2023-03-31\n",
"2 SOLIGENIX, INC. (SNGX) 2023-03-31\n",
"3 NovaBay Pharmaceuticals, Inc. (NBY) 2023-03-31\n",
"4 Kodiak Gas Services, Inc. 2023-03-31\n",
"5 Artificial Intelligence Technology Solutions I... 2023-03-31"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scraper.set_search_date(\"2021-01-01\", \"2023-03-31\")\n",
"\n",
"company_names, file_dates, form_types = scraper.edgar_scrape(5)\n",
"\n",
"df = scraper.generate_df(6,1)\n",
"df.iloc[1:]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Modifying Form Types"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'10-Q', '10-Q/A', '10-K'}\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" names | \n",
" filing date | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" GOLDEN GRAIN ENERGY | \n",
" 2021-12-30 | \n",
"
\n",
" \n",
" 2 | \n",
" Financial Gravity Companies, Inc. (FGCO) | \n",
" 2021-12-30 | \n",
"
\n",
" \n",
" 3 | \n",
" NEOGEN CORP (NEOG) | \n",
" 2021-12-30 | \n",
"
\n",
" \n",
" 4 | \n",
" EASTON PHARMACEUTICALS INC. | \n",
" 2021-12-30 | \n",
"
\n",
" \n",
" 5 | \n",
" Nestbuilder.com Corp. (NBLD) | \n",
" 2021-12-30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" names filing date\n",
"1 GOLDEN GRAIN ENERGY 2021-12-30\n",
"2 Financial Gravity Companies, Inc. (FGCO) 2021-12-30\n",
"3 NEOGEN CORP (NEOG) 2021-12-30\n",
"4 EASTON PHARMACEUTICALS INC. 2021-12-30\n",
"5 Nestbuilder.com Corp. (NBLD) 2021-12-30"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scraper.reset_url()\n",
"scraper.set_search_date(\"2021-12-30\", \"2022-01-01\")\n",
"scraper.add_forms(['10-K', '10-Q'])\n",
"\n",
"company_names, file_dates, form_types = scraper.edgar_scrape(5)\n",
"print (form_types)\n",
"\n",
"df = scraper.generate_df(6,1)\n",
"df.iloc[1:]"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generating Reference Codes and Accession Numbers"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['000007800323000024', '000007800322000027', '000007800321000038']\n",
"['pfe-20221231.htm', 'pfe-20211231.htm', 'pfe-20201231.htm']\n",
"PFIZER INC (PFE) \n"
]
}
],
"source": [
"#This is the cik that identifies Pfizer\n",
"cik = '0000078003'\n",
"\n",
"#This will yield all references and the name of a company tied to a cik\n",
"references, name = scraper.get_refs(cik,3)\n",
"\n",
"#This will yield all accession numbers tied to a cik\n",
"accession_numbers = scraper.get_anums(cik,3)\n",
"\n",
"print(accession_numbers)\n",
"print(references)\n",
"print(name)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Finding xbrl Links for a Company"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://www.sec.gov/ix?doc=/Archives/edgar/data/0000078003/000007800323000024/pfe-20221231.htm\n",
"https://www.sec.gov/ix?doc=/Archives/edgar/data/0000078003/000007800322000027/pfe-20211231.htm\n",
"https://www.sec.gov/ix?doc=/Archives/edgar/data/0000078003/000007800321000038/pfe-20201231.htm\n"
]
}
],
"source": [
"links,c_name = scraper.create_links(cik, 3)\n",
"\n",
"for link in links:\n",
" print(link)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Scraping Link for Financial Info and Calculating Ratio"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Assets: 197205.0\n",
"Total Liabilities: 101288.0\n",
"Total Equity: 95916.0\n",
"Current Assets: 51259.0\n",
"Current Liabilities: 42138\n",
"Net Income: 31372.0\n",
"Long Term Debt: 32884.0\n",
"Current Debt: 2945.0\n",
"Inventory: 8981.0\n",
"Registrant: PFIZER INC\n",
"\n",
"D/E: 1.0560073397556196\n",
"ROE: 0.3270778597939864\n",
"Working Capital: 1.216455455883051\n",
"Quick: 1.0033224168209218\n",
"TD/TA: 0.181684034380467\n",
"ROA: 0.1590831875459547\n",
"\n"
]
}
],
"source": [
"link = links[0]\n",
"\n",
"Financials = scraper.scrape_xbrl(link)\n",
"for f in Financials.items():\n",
" print(''+f[0]+': '+str(f[1]))\n",
"print()\n",
"\n",
"ratios = scraper.calculate_ratios(Financials)\n",
"\n",
"for f in ratios.items():\n",
" print(''+f[0]+': '+str(f[1]))\n",
"print()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 10-K Dataframe Summary"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Account | \n",
" Amount | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Total Assets | \n",
" 197205.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Total Liabilities | \n",
" 101288.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Total Equity | \n",
" 95916.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Current Assets | \n",
" 51259.0 | \n",
"
\n",
" \n",
" 4 | \n",
" Current Liabilities | \n",
" 42138 | \n",
"
\n",
" \n",
" 5 | \n",
" Net Income | \n",
" 31372.0 | \n",
"
\n",
" \n",
" 6 | \n",
" Long Term Debt | \n",
" 32884.0 | \n",
"
\n",
" \n",
" 7 | \n",
" Current Debt | \n",
" 2945.0 | \n",
"
\n",
" \n",
" 8 | \n",
" Inventory | \n",
" 8981.0 | \n",
"
\n",
" \n",
" 9 | \n",
" Registrant | \n",
" PFIZER INC | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Account Amount\n",
"0 Total Assets 197205.0\n",
"1 Total Liabilities 101288.0\n",
"2 Total Equity 95916.0\n",
"3 Current Assets 51259.0\n",
"4 Current Liabilities 42138\n",
"5 Net Income 31372.0\n",
"6 Long Term Debt 32884.0\n",
"7 Current Debt 2945.0\n",
"8 Inventory 8981.0\n",
"9 Registrant PFIZER INC"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Ratio | \n",
" Value | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Quick | \n",
" 1.003322 | \n",
"
\n",
" \n",
" 1 | \n",
" Working Capital | \n",
" 1.216455 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Ratio Value\n",
"0 Quick 1.003322\n",
"1 Working Capital 1.216455"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Ratio | \n",
" Value | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" D/E | \n",
" 1.056007 | \n",
"
\n",
" \n",
" 1 | \n",
" TD/TA | \n",
" 0.181684 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Ratio Value\n",
"0 D/E 1.056007\n",
"1 TD/TA 0.181684"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"raw_df = scraper.summarize_10k(link, flag = 'raw')\n",
"ratio_df = scraper.summarize_10k(link, flag = 'liquidity')\n",
"lev_df = scraper.summarize_10k(link, flag = 'leverage')\n",
"\n",
"display(raw_df)\n",
"print()\n",
"display(ratio_df)\n",
"print()\n",
"display(lev_df)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}